package tm.hlta

import scala.io.Source
import tm.util.Tree
import tm.util.TreeList
import org.json4s._
import java.util.ArrayList
import java.io.PrintWriter

class Word(val w: String, val probability: Option[Double]) {
  override def toString = probability match {
    case Some(p) => f"$w ($p%.03f)"
    case None => w
  }
  
  def toVarName() = w
}

object Word {
  def apply(w: String): Word = new Word(w, None)
  def apply(w: String, p: Double): Word = new Word(w, Some(p))
}

class Topic(var name: String, var words: Seq[Word], var level: Option[Int], var size: Option[Double], var mi: Option[Double]){
  
  /**
   * Get jstree node content
   * See Topic.JstreeNode
   */
  def defaultContent(showPercentageInLabel: Boolean = true): JstreeWriter.Node = {
    val label = if(showPercentageInLabel && size.isDefined)  f"${size.get}%.3f ${words.mkString(" ")}" 
                else  words.mkString(" ")
    var data = Map[String, Any]()
    data += ("name" -> name)
    if(level.isDefined)         data += ("level" -> level.get)
    if(size.isDefined)    data += ("percentage" -> size.get)
    if(mi.isDefined)            data += ("mi" -> mi.get)
    JstreeWriter.Node(name, label, data)
  }
}

object Topic{
  def apply(name: String, words: Seq[String]): Topic = new Topic(name, words.map{ x => Word(x)}, None, None, None)
  def apply(name: String, level: Int, words: Seq[String]): Topic = 
    new Topic(name, words.map{ x => Word(x)}, Some(level), None, None)
  def apply(name: String, level: Int, percentage: Double, mi: Option[Double], words: Seq[Word]): Topic = 
    new Topic(name, words, Some(level), Some(percentage), mi)
    
}

object TopicTree{
  
  def apply(topicTree: Seq[Tree[Topic]]) = new TopicTree(topicTree)
  
  def apply(topicTree: TreeList[Topic]) = new TopicTree(topicTree.roots)
  
  /**
   *  Reads topics generated by src/main/java/clustering/HLTAOutputTopics_html_Ltm
   */
  def readHtml(topicsFile: String) = HTMLTopicTable.readTopicTree(topicsFile)
  
  def readJson(fileName: String) = {
    
    case class Data(name: String, level: Option[Int], percentage: Option[Double], mi: Option[Double])
    case class Node(id: String, text: String, data: Option[Data], children: List[Node])
    import org.json4s.native.JsonMethods._
    
    implicit val formats = DefaultFormats
    val jsonString = Source.fromFile(fileName).mkString
    val entries = parse(jsonString).extract[List[Node]]
    val b = new ArrayList[String]()
      b.add(0, "s0")
      b.add(1, "s1")
      
    def _constructTree(node: Node): Tree[Topic] = {
      val children = node.children.map { x => _constructTree(x) }
      val text = node.text.split(" ").filter { x => !x.charAt(0).isDigit }
      val topic = {
        if(node.data.isDefined && node.data.get.level.isDefined)
          Topic(node.id, node.data.get.level.get, text)
        else
          Topic(node.id, text)
      }
      Tree.node(topic, children)
    }
    val trees = entries.map{_constructTree(_)}
    new TopicTree(trees)
  }
  
}

class TopicTree(roots: Seq[Tree[Topic]]) extends TreeList[Topic](roots){

  override def trimLevels(takeLevels: List[Int]) = {
    val height = super.height()
    val _takeLevels = takeLevels.map{l => 
      if(l>height) throw new Exception("invalid layer number, lager than the possible height "+height)
      else if(l <= -height) throw new Exception("invalid layer number, lower than the leaf height "+(-height)+" (if root is 0)")
      else if(l<=0) l+height
      else l}
    TopicTree(super.trimLevels(_takeLevels).roots)
  }
  
  override def sortRoots[B](f: Tree[Topic] => B)(implicit ord: Ordering[B]) = TopicTree(super.sortRoots(f))
  
  def reassignLevel(){
    val levelLookup = findLevels()
    foreach{ topic => topic.level = levelLookup.get(topic)}
  }
  
  def saveAsJs(outputFile: String, jsVarName: String = "nodes") = JstreeWriter.writeJs[Topic](roots, outputFile, jsVarName, (topic => topic.defaultContent(true)))

  def saveAsJson(outputFile: String) = JstreeWriter.writeJson[Topic](roots, outputFile, (topic => topic.defaultContent(false)))
  
  /**
   *  Output html is not the same as the one generated by src/main/java/clustering/HLTAOutputTopics_html_Ltm
   */
  def saveAsSimpleHtml(outputFile: String) = JstreeWriter.writeSimpleHtml[Topic](roots, outputFile, (topic => topic.defaultContent(true)))

}

object HTMLTopicTable {

  class HTMLTopic(name: String, level: Int,
    val indent: Int, size: Double, mi: Option[Double], words: Seq[Word])
      extends Topic(name, words, Some(level), Some(size), mi)

  val lineRegex = """<p level ="([^"]*)" name ="([^"]*)" parent = "([^"]*)" (?:percentage ="([^"]*)" )?(?:MI = "([^"]*)" )?style="text-indent:(.+?)em;"> ([.0-9]+) (.*?)</p>""".r
  val wordsWithProbRegex = """\s*(([^. ]+) ([01]\.[0-9]+)\s*)*""".r

  def readTopicTree(topicTableFile: String): TopicTree = {
    val topics = readTopics(topicTableFile)
    val ltmTrees = buildLTMTrees(topics)
    TopicTree(buildTopicTree(ltmTrees))
  }

  /**
   *  Reads topics and their parents from the specified file.  It returns a list
   *  in which each element is a pair of topic and its parent.
   */
  def readTopics(topicsFile: String): List[(HTMLTopic, String)] = {
    val input = Source.fromFile(topicsFile)
    val lines = input.getLines
      .dropWhile(_ != """<div class="div">""")
      .drop(1) // drop the above <div> line
      .takeWhile(_ != """</div>""")

    try {
      lines.map {
        _ match {
          case lineRegex(level, name, parent, percentage, mi,
            indent, percentage1, words) =>
            val miDouble = if (mi == null) None else Some(mi.toDouble)
            val ws = words match {
              case wordsWithProbRegex(_*) =>
                words.split("\\s+").grouped(2)
                  .map(xs => Word(xs(0), xs(1).toDouble)).toVector
              case _ => words.split("\\s+").map(Word.apply).toVector
            }
            (new HTMLTopic(name = name, level = level.toInt, indent = indent.toInt,
              size = percentage1.toDouble, mi = miDouble, words = ws), parent)
        }
      }.toList
    } finally {
      input.close
    }
  }

  def buildLTMTrees(topics: List[(Topic, String)]): List[Tree[Topic]] = {
    val topicToChildrenMap = topics.groupBy(_._2).map {
      _ match {
        case (parent, childPairs) => (parent, childPairs.map(_._1))
      }
    }

    /**
     * Constructs a tree of topics based on the latent tree model.  In this
     * tree, the top level nodes may contain a child of the same level.
     */
    def constructLTMTree(topic: Topic): Tree[Topic] =
      topicToChildrenMap.get(topic.name) match {
        case Some(children) =>
          new Tree(topic, children.map(constructLTMTree))
        case None => Tree.leaf(topic)
      }

    val roots = topics.filter(_._2 == "none").map(_._1)
    roots.map(constructLTMTree)
  }

  /**
   * Builds topic trees such that all top-level topics are used as roots.
   */
  def buildTopicTree(ltmTrees: List[Tree[Topic]]): List[Tree[Topic]] = {
    // find all top-level topics
    val topLevel = ltmTrees.map(_.value.level).max
    val topLevelTrees = ltmTrees.flatMap(_.findSubTrees { _.level == topLevel })

    // Filter away children of the same level.  This happens when the top level
    // topics are connected as a tree.
    def filterSameLevelChildren(tree: Tree[Topic]): Tree[Topic] = {
      import tree.{ value => v }
      new Tree(v, tree.children.filter(_.value.level.get < v.level.get)
        .map(filterSameLevelChildren(_)))
    }

    topLevelTrees.map(filterSameLevelChildren)
  }
}

  
//object JavascriptTopicTable {
//
//  class JavascriptTopic(id: String, words: Seq[Word], name: Option[String], level: Option[Int] = None,
//      percentage: Option[Double] = None, mi: Option[Double], children: Seq[JavascriptTopic])
//      extends Topic(id, level, percentage, mi, words)
//
//  //"""<p level ="([^"]*)" name ="([^"]*)" parent = "([^"]*)" (?:percentage ="([^"]*)" )?(?:MI = "([^"]*)" )?style="text-indent:(.+?)em;"> ([.0-9]+) (.*?)</p>""".r
//  
//  val lineRegex = """[ ]*id: "([^"]*)", text: "0.138 ([^"]*)", data: {[ ]*(?:name: "([^"]*)")?[, ]*(?:level: "([^"]*)")?[, ]*(?:percentage: "([^"]*)")?[, ]*(?:mi: "([^"]*)")? }, children: \[""".r
//  val wordsWithProbRegex = """\s*(([^ ]+) ([.0-9]+)\s*)*""".r
//
//  def readTopicTree(topicTableFile: String): TopicTree = {
//    val topics = readTopics(topicTableFile)
//    val ltmTrees = buildLTMTrees(topics)
//    TopicTree(buildTopicTree(ltmTrees))
//  }
//
//  /**
//   *  Reads topics and their parents from the specified file.  It returns a list
//   *  in which each element is a pair of topic and its parent.
//   */
//  def readTopics(topicsFile: String): List[(JavascriptTopic, String)] = {
//    val input = Source.fromFile(topicsFile)
//    val lines = input.getLines
//      .dropWhile(_ != """var nodes = [{""")
//      .takeWhile(_ != """];""")
//    val parent = 
//
//    try {
//      lines.map {
//        _ match {
//          case lineRegex(id, text, name, level, percentage, mi, children) =>
//            val miDouble = if (mi == null) None else Some(mi.toDouble)
//            val ws = text match {
//              case wordsWithProbRegex(_*) =>
//                text.split("\\s+").grouped(2)
//                  .map(xs => Word(xs(0), xs(1).toDouble)).toVector
//              case _ => text.split("\\s+").map(Word.apply).toVector
//            }
//            (new JavascriptTopic(id = id, words = ws, name = name, level = level,
//              percentage = percentage, mi = mi), parent)
//        }
//      }.toList
//    } finally {
//      input.close
//    }
//  }
//
//  def buildLTMTrees(topics: List[(Topic, String)]): List[Tree[Topic]] = {
//    val topicToChildrenMap = topics.groupBy(_._2).map {
//      _ match {
//        case (parent, childPairs) => (parent, childPairs.map(_._1))
//      }
//    }
//
//    /**
//     * Constructs a tree of topics based on the latent tree model.  In this
//     * tree, the top level nodes may contain a child of the same level.
//     */
//    def constructLTMTree(topic: Topic): Tree[Topic] =
//      topicToChildrenMap.get(topic.name) match {
//        case Some(children) =>
//          new Tree(topic, children.map(constructLTMTree))
//        case None => Tree.leaf(topic)
//      }
//
//    val roots = topics.filter(_._2 == "none").map(_._1)
//    roots.map(constructLTMTree)
//  }
//
//  /**
//   * Builds topic trees such that all top-level topics are used as roots.
//   */
//  def buildTopicTree(ltmTrees: List[Tree[Topic]]): List[Tree[Topic]] = {
//    // find all top-level topics
//    val topLevel = ltmTrees.map(_.value.level).max
//    val topLevelTrees = ltmTrees.flatMap(_.findSubTrees { _.level == topLevel })
//
//    // Filter away children of the same level.  This happens when the top level
//    // topics are connected as a tree.
//    def filterSameLevelChildren(tree: Tree[Topic]): Tree[Topic] = {
//      import tree.{ value => v }
//      new Tree(v, tree.children.filter(_.value.level.get < v.level.get)
//        .map(filterSameLevelChildren(_)))
//    }
//
//    topLevelTrees.map(filterSameLevelChildren)
//  }
//
//}